library(raster)
## Loading required package: sp
library(rgdal)
## Warning: package 'rgdal' was built under R version 3.5.2
## rgdal: version: 1.4-8, (SVN revision 845)
## Geospatial Data Abstraction Library extensions to R successfully loaded
## Loaded GDAL runtime: GDAL 2.4.2, released 2019/06/28
## Path to GDAL shared files: /Library/Frameworks/R.framework/Versions/3.5/Resources/library/rgdal/gdal
## GDAL binary built with GEOS: FALSE
## Loaded PROJ.4 runtime: Rel. 5.2.0, September 15th, 2018, [PJ_VERSION: 520]
## Path to PROJ.4 shared files: /Library/Frameworks/R.framework/Versions/3.5/Resources/library/rgdal/proj
## Linking to sp version: 1.3-2
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:raster':
##
## intersect, select, union
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(readr)
library(ggplot2)
library(pophelper)
## pophelper v2.3.1 ready.
library(scatterpie)
##
## Attaching package: 'scatterpie'
## The following object is masked from 'package:sp':
##
## recenter
library(ggnewscale)
library(gridExtra)
##
## Attaching package: 'gridExtra'
## The following object is masked from 'package:dplyr':
##
## combine
First plot CV
### parvi
parvi_CV<-read.delim("../data/genetic/output/admixture/parvi/Kerror_parviglumis.txt",
sep=" ", header=FALSE, stringsAsFactors = FALSE) %>%
mutate(K = parse_number(V3)) %>%
dplyr::select(K, V4) %>% rename(CV=V4)
## Plot CV
p <- ggplot(parvi_CV, aes(x=K, y=CV)) + geom_point(size=1) + geom_line()
p + theme(axis.title.x = element_text(face="bold", size=20),
axis.text.x = element_text(angle=90, vjust=0.5, size=16),
axis.title.y = element_text(face="bold", size=20),
axis.text.y = element_text(size=16)) +
ggtitle(label = "Z. m. parviglumis")
The CV error of the Admixture analysis doesn't show a clear K value, so we chose those K values where the slope changes for each species. For ** Z. m. parviglumis** this is K=13 and K=25.
Load admixture Q data and samples metadata
# get data
parvifiles<-c("../data/genetic/output/admixture/parvi/bytaxa_parviglumis.25.Q",
"../data/genetic/output/admixture/parvi/bytaxa_parviglumis.13.Q")
parvi_list<- readQ(parvifiles)
# Check
attributes(parvi_list)
## $names
## [1] "bytaxa_parviglumis.25.Q" "bytaxa_parviglumis.13.Q"
# Get metadata
meta<-read.delim("../data/genetic/output/ADN_pasap_3604.txt")
parviPlinksamples<-read.table("../data/genetic/output/bytaxa_parviglumis.fam")
parvimeta<-meta[meta$POBL %in% parviPlinksamples$V2, ]
levels(parvimeta$ESTADO)[13]<-"EdoMex"
levels(parvimeta$ESTADO)[14]<-"Michoacan"
# add sample metadata to qlist
attributes(parvi_list[[1]])$row.names<-as.character(parvimeta$POBL)
attributes(parvi_list[[2]])$row.names<-as.character(parvimeta$POBL)
# drop unused levels
parvimeta<-droplevels(parvimeta)
Plot Qvals of both Ks of interest:
p1 <- plotQ(alignK(parvi_list[1:2]),
sortind = "all",
imgoutput="join", returnplot=T, sharedindlab=FALSE,
exportplot=F,basesize=11)
grid.arrange(p1$plot[[1]])
Plot only K=13 ordering by all genetic clusters
p1<-plotQ(parvi_list[2],
returnplot=T,
exportplot=F,
sortind = "all", basesize = 11,
# showindlab= TRUE, useindlab=TRUE, indlabangle=90, indlabsize=0.1,
exportpath = "../data/genetic/output/admixture/parvi")
plot(p1$plot[[1]])
Scatterpies of admixture groups
# Read raw Qval file
Qval<-read.table(paste0("../data/genetic/output/admixture/parvi/bytaxa_parviglumis.13.Q"))
names(Qval)<-paste0("K", 1:ncol(Qval))
# add metadata
Qval<-cbind(parvimeta$POB, parvimeta$INDIV, parvimeta$LONGITUDE, parvimeta$LATITUDE, Qval)
names(Qval)[1:4]<-c("POB", "INDIV", "LONGITUDE", "LATITUDE")
# generate same colors than the ones used for admixture plot
piecolors<-gplots::rich.colors(13)
# plot scatterpie
ppie1<- ggplot() + geom_scatterpie(data=Qval,
aes(x=LONGITUDE,
y=LATITUDE,
group=POB),
color=NA,
cols=paste0("K", 1:13)) +
scale_fill_manual(values= piecolors,
name="Genetic cluster") + theme_bw()
ppie1
Adding populations names:
# plot scatterpie
ppie1 + geom_text(data=parvimeta,
aes(x=parvimeta$LONGITUDE,
y=parvimeta$LATITUDE,
label=POB),
color="grey40",
check_overlap = T,
hjust = 0, vjust=1, nudge_x = 0.0005,
size= 5.5)
## Warning: Use of `parvimeta$LONGITUDE` is discouraged. Use `LONGITUDE` instead.
## Warning: Use of `parvimeta$LATITUDE` is discouraged. Use `LATITUDE` instead.
Plot ordering populations in geographical order
# Get longitudinal order
df<-data.frame(POB=parvimeta$POB, LONGITUDE=parvimeta$LONGITUDE) %>%
unique() %>%
arrange(LONGITUDE)
unique(df$POB)
## [1] BVPU BGUA BQUE BEJU BMAN BSAU BTAR BTAC BNOC BRED BCAR BHUE BTZI BTIQ BTUZ
## [16] BJUA BTAL BPCH BZUL BOTZ BVBR BTEJ BTEL BZAC BAPX BIXC BTCT BMAZ MALI BCOL
## [31] BHUI BMOR BOLI BOAX
## 34 Levels: BAPX BCAR BCOL BEJU BGUA BHUE BHUI BIXC BJUA BMAN BMAZ BMOR ... MALI
# set levels of Pops in longitudinal order
desired_order<-unique(df$POB)
parvimeta$POBorder<-factor(parvimeta$POB, levels = desired_order)
# change levels so that they sort in that order alphabetically
levels(parvimeta$POBorder)<-paste0(1:34, levels(parvimeta$POBorder))
levels(parvimeta$POBorder)[1:9]<-paste0("0", levels(parvimeta$POBorder)[1:9])
# set labels
labset2<-data.frame(POBorder=as.character(parvimeta$POBorder),
ESTADO=as.character(parvimeta$ESTADO), stringsAsFactors = FALSE)
#plot
p<-plotQ(parvi_list[2],
returnplot=T,
exportplot=F,
sortind = "all",
grplab=labset2,
ordergrp=TRUE,
grplabangle= 45, grplabsize=10, linesize=2, divsize = 2, grplabheight= 2, grplabpos=0.5,
exportpath = "../data/genetic/output/admixture/parvi")
plot(p$plot[[1]])
Extract in which PGD do the sampled individuals fall. This would be used to then add this information to the plot and see how well PGD correspond to genetic clusters.
First load raster data of Zea mays spp parviglumis SDM and the PGD that fell within it:
# path to rasters
my_rasters<-c("../data/spatial/modelosDarwinZea/Zea_mays_parviglumis.tif", # SDM
"../data/spatial/areasProxyDivGen/crop_to_sp/PGD_Zea_mays_parviglumis.tif") #SDM subdivied by Proxies of gen div
# stack rasters and add nicer name
parvi_rasters<-stack(my_rasters)
# check
names(parvi_rasters)
## [1] "Zea_mays_parviglumis" "PGD_Zea_mays_parviglumis"
# Check projection
proj4string(parvi_rasters$PGD_Zea_mays_parviglumis)
## [1] "+proj=merc +lon_0=0 +k=1 +x_0=0 +y_0=0 +datum=WGS84 +units=m +no_defs +ellps=WGS84 +towgs84=0,0,0"
# change projection to longlat for nicer visualization
newcrs <- CRS("+proj=longlat +datum=WGS84 +no_defs +ellps=WGS84 +towgs84=0,0,0")
parvi_rasters<-projectRaster(parvi_rasters, crs=newcrs, method="ngb")
parvi_rasters<-stack(parvi_rasters)
Z. m. parviglumis has 29 PGD distributed like this:
# Get number of cells of each PDG
table(getValues(parvi_rasters$PGD_Zea_mays_parviglumis))
##
## 0 2 5 6 7 8 10 11 15 20
## 2242616 395 8786 6753 3 9641 425 2954 5340 1025
## 21 23 25 32 36 37 38 39 40 41
## 1070 92 2237 359 7557 31658 1731 393 4462 201
## 42 43 48 58 84 86 90 92 93
## 4 8556 5913 34 1290 6 501 239 50
Plot PGD and sampling points of genetic data.
# crop raster to make it more managable
parvi_small<- raster::crop(parvi_rasters$PGD_Zea_mays_parviglumis,
y=extent(c(-107, -96 , 15.5, 22)))
# convert to df to plot with ggplot
raster_df<-as.data.frame(rasterToPoints(parvi_small, spatial = TRUE))
raster_df$PGD_Zea_mays_parviglumis<-as.character(raster_df$PGD_Zea_mays_parviglumis)
# get nice colors
mycols<-c("grey40","#917031", "#4d6cd0", "#8bbb39", "#c556c5", "#53d06a", "#895dc8", "#42a331", "#d84492", "#69b974", "#d63c48", "#4dc0b1", "#d3542b", "#4b9cd3", "#d2a337", "#9289cf", "#b1b23a", "#9a5398", "#498331", "#cc4668", "#37835d", "#df89be", "#6e7721", "#9b496e", "#b9b06d", "#ac575b", "#65743a", "#e78b7a", "#db873d", "#a2552d")
# get labels with 0 as "NA" (it is not really NA, just the part of Mx out of the species range). True NA is outside of Mx.
mylabs<-unique(getValues(parvi_rasters$PGD_Zea_mays_parviglumis))[2:length(unique(getValues(parvi_rasters$PGD_Zea_mays_parviglumis)))] # ommit the first (true NA)
mylabs[1]<-"NA" # change 0 to "NA"
mylabs
## [1] "NA" "5" "32" "42" "10" "86" "90" "36" "20" "48" "92" "25" "2" "11" "37"
## [16] "6" "21" "43" "84" "93" "8" "38" "15" "23" "40" "58" "41" "39" "7"
# plot PGD raster
p<- ggplot() +
geom_raster(data = raster_df , aes(x = x, y = y, fill = PGD_Zea_mays_parviglumis)) +
scale_fill_manual(values= alpha(mycols, 0.5) , name="PGD",
labels=mylabs) + # no prevent the area outside the SDM to appear as a PDG 0
theme_bw() +
labs(x="", y= "") + theme(text = element_text(size = 25))
# add sampling points
p + geom_point(data=parvimeta, aes(x=LONGITUDE,
y=LATITUDE))
## Warning: Raster pixels are placed at uneven horizontal intervals and will be
## shifted. Consider using geom_tile() instead.
Plot scatterpies with genetic groups
p_piemap<-p + new_scale("fill") + # this is needed because raster and pie have diff fills
geom_scatterpie(data=Qval,
aes(x=LONGITUDE,
y=LATITUDE,
group=POB),
color="NA",
cols=paste0("K", 1:13)) +
scale_fill_manual(values= piecolors,
name="Genetic cluster") +
labs(x="", y= "") + theme(text = element_text(size = 25))
p_piemap
## Warning: Raster pixels are placed at uneven horizontal intervals and will be
## shifted. Consider using geom_tile() instead.
Extract in which PGD each genetic sample falls:
# Create function to estimate the mode, omitting na values
Mode <- function(x) {
ux <- unique(na.omit(x))
ux[which.max(tabulate(match(x, ux)))]
}
# transform 0 value (out of sp SDM range) to NA so that only PGD are considered
parvi_rasters$PGD_Zea_mays_parviglumis<-reclassify(parvi_rasters$PGD_Zea_mays_parviglumis,
rcl=c(0, .1, NA), include.lowest=TRUE)
# extract the PDG most frequent in a given buffer radio of the sampling point
# ignoring NA values (see Mode fun)
PGD_points<-raster::extract(parvi_rasters$PGD_Zea_mays_parviglumis,
buffer=2500, fun=Mode, # so that no points fell in only NAs
y=data.frame(long=parvimeta$LONGITUDE,
lat=parvimeta$LATITUDE)) %>%
as.character()
PGD_points<-parvimeta %>% dplyr::select(LONGITUDE, LATITUDE, DNASample, POB, POBorder, POBL) %>%
mutate(PGD_points=PGD_points)
table(PGD_points$PGD_points)
##
## 10 11 15 36 37 40 41 43 48 5 6 8 84
## 37 54 12 130 707 68 42 276 33 205 112 62 5
# check if there are samples that did not fell in any PGD
sum(is.na(PGD_points$PGD_points))
## [1] 26
PGD_points[is.na(PGD_points$PGD_points), ]
## LONGITUDE LATITUDE DNASample POB POBorder POBL PGD_points
## 1078 -99.28528 16.98056 186_10 BTCT 27BTCT BTCT_186_10 <NA>
## 1079 -99.28528 16.98056 186_11 BTCT 27BTCT BTCT_186_11 <NA>
## 1080 -99.28528 16.98056 186_12 BTCT 27BTCT BTCT_186_12 <NA>
## 1081 -99.28528 16.98056 186_13 BTCT 27BTCT BTCT_186_13 <NA>
## 1082 -99.28528 16.98056 186_14 BTCT 27BTCT BTCT_186_14 <NA>
## 1083 -99.28528 16.98056 186_16 BTCT 27BTCT BTCT_186_16 <NA>
## 1084 -99.28528 16.98056 186_17 BTCT 27BTCT BTCT_186_17 <NA>
## 1085 -99.28528 16.98056 186_18 BTCT 27BTCT BTCT_186_18 <NA>
## 1086 -99.28528 16.98056 186_19 BTCT 27BTCT BTCT_186_19 <NA>
## 1087 -99.28528 16.98056 186_1 BTCT 27BTCT BTCT_186_1 <NA>
## 1088 -99.28528 16.98056 186_20 BTCT 27BTCT BTCT_186_20 <NA>
## 1089 -99.28528 16.98056 186_21 BTCT 27BTCT BTCT_186_21 <NA>
## 1090 -99.28528 16.98056 186_22 BTCT 27BTCT BTCT_186_22 <NA>
## 1091 -99.28528 16.98056 186_23 BTCT 27BTCT BTCT_186_23 <NA>
## 1092 -99.28528 16.98056 186_24 BTCT 27BTCT BTCT_186_24 <NA>
## 1093 -99.28528 16.98056 186_25 BTCT 27BTCT BTCT_186_25 <NA>
## 1094 -99.28528 16.98056 186_26 BTCT 27BTCT BTCT_186_26 <NA>
## 1095 -99.28528 16.98056 186_27 BTCT 27BTCT BTCT_186_27 <NA>
## 1096 -99.28528 16.98056 186_28 BTCT 27BTCT BTCT_186_28 <NA>
## 1097 -99.28528 16.98056 186_29 BTCT 27BTCT BTCT_186_29 <NA>
## 1098 -99.28528 16.98056 186_2 BTCT 27BTCT BTCT_186_2 <NA>
## 1099 -99.28528 16.98056 186_30 BTCT 27BTCT BTCT_186_30 <NA>
## 1100 -99.28528 16.98056 186_4 BTCT 27BTCT BTCT_186_4 <NA>
## 1101 -99.28528 16.98056 186_5 BTCT 27BTCT BTCT_186_5 <NA>
## 1102 -99.28528 16.98056 186_7 BTCT 27BTCT BTCT_186_7 <NA>
## 1103 -99.28528 16.98056 186_9 BTCT 27BTCT BTCT_186_9 <NA>
Save PGD points to be used in PCAdapt plot
write.csv(PGD_points, "../data/genetic/output/PGD_points_parvi.txt")
Remove genetic samples that did not fell in any PGD
# check original n
nrow(PGD_points)
## [1] 1769
# get samples names that FELL in PGD
PGD_points<-PGD_points[!is.na(PGD_points$PGD_points), ]
fell_in_PGD<-as.character(PGD_points$POBL)
# check how many samples we have now
length(fell_in_PGD)
## [1] 1743
# keep only samples that fell in PGD in the admixture data
# get rows matching fell in PGD
x<-attributes(parvi_list[[2]])$row.names %in% fell_in_PGD
# subset list
subset_admix_list<-list(parvi_list[[2]][x,])
names(subset_admix_list)<-names(parvi_list[2])
Use this information to see in which genetic group our PGD fall:
## order PGD
# set levels of PGD more or less in lontigitudinal order
desired_order<-c("48", "36", "10", "5", "15", "11" , "37", "6", "84", "43", "8", "40", "41")
PGD_points$PGD_order<-factor(PGD_points$PGD_points, levels = desired_order)
# change levels so that they sort in that order alphabetically
levels(PGD_points$PGD_order)<-paste0(1:13, "_",levels(PGD_points$PGD_order))
levels(PGD_points$PGD_order)[1:9]<-paste0("0", levels(PGD_points$PGD_order)[1:9])
# set labels
labsetPGD<-data.frame(PGD_points=as.character(PGD_points$PGD_order),
POBorder=as.character(PGD_points$POBorder),
stringsAsFactors = FALSE)
p3<-plotQ(subset_admix_list[1],
returnplot=T,
exportplot=F,
sortind = "all",
grplab=labsetPGD, selgrp = "PGD_points",
ordergrp=T,
grplabangle= 90, grplabsize=8, linesize=3, divsize = 3.5,
exportpath = getwd(), height = 20, width = 80)
plot(p3$plot[[1]])
Omit labels for plot for the paper and save as png:
p3<-plotQ(subset_admix_list[1],
returnplot=T,
exportplot=T,
sortind = "all",
grplab=labsetPGD, selgrp = "PGD_points",
ordergrp=T,
grplabangle= 90, grplabsize=8, divsize = 1,
showgrplab= FALSE, splab="",
exportpath = getwd(), height = 2.5, width = 15,
outputfilename="../figures/parviglumis13Q_subsetPGD")
## Drawing plot ...
## /Volumes/datos/DarwinUICN/Zonificacion/Zonas_de_vida/analisisUniCons_proxiGen/bin/../figures/parviglumis13Q_subsetPGD.png exported.
Save map for paper plot:
p_piemap<-p + theme(legend.position = "none") +
new_scale("fill") + # this is needed because raster and pie have diff fills
geom_scatterpie(data=Qval,
aes(x=LONGITUDE,
y=LATITUDE,
group=POB),
color="NA",
cols=paste0("K", 1:13)) +
scale_fill_manual(values= piecolors)+
labs(x="", y= "") + theme(text = element_text(size = 15))
# save to file
ggsave("../figures/piemap_parviglumis_pgd.png",
plot=p_piemap, dpi=72,
width=15, height = 10, units="cm")
## Warning: Raster pixels are placed at uneven horizontal intervals and will be
## shifted. Consider using geom_tile() instead.
Get session info:
sessionInfo()
## R version 3.5.1 (2018-07-02)
## Platform: x86_64-apple-darwin15.6.0 (64-bit)
## Running under: OS X El Capitan 10.11.6
##
## Matrix products: default
## BLAS: /Library/Frameworks/R.framework/Versions/3.5/Resources/lib/libRblas.0.dylib
## LAPACK: /Library/Frameworks/R.framework/Versions/3.5/Resources/lib/libRlapack.dylib
##
## locale:
## [1] en_US.UTF-8/en_US.UTF-8/en_US.UTF-8/C/en_US.UTF-8/en_US.UTF-8
##
## attached base packages:
## [1] stats graphics grDevices utils datasets methods base
##
## other attached packages:
## [1] gridExtra_2.3 ggnewscale_0.4.5 scatterpie_0.1.5 pophelper_2.3.1
## [5] ggplot2_3.3.3 readr_1.4.0 dplyr_1.0.2 rgdal_1.4-8
## [9] raster_3.4-5 sp_1.4-4
##
## loaded via a namespace (and not attached):
## [1] gtools_3.8.2 tidyselect_1.1.0 xfun_0.19
## [4] lpSolve_5.6.15 purrr_0.3.4 lattice_0.20-41
## [7] colorspace_2.0-0 vctrs_0.3.5 generics_0.1.0
## [10] htmltools_0.5.0 label.switching_1.8 yaml_2.2.1
## [13] rlang_0.4.9 pillar_1.4.7 glue_1.4.2
## [16] withr_2.3.0 tweenr_1.0.1 rvcheck_0.1.8
## [19] lifecycle_0.2.0 stringr_1.4.0 munsell_0.5.0
## [22] combinat_0.0-8 gtable_0.3.0 caTools_1.17.1.2
## [25] codetools_0.2-18 evaluate_0.14 labeling_0.4.2
## [28] knitr_1.30 Rcpp_1.0.5 KernSmooth_2.23-18
## [31] scales_1.1.1 BiocManager_1.30.10 farver_2.0.3
## [34] gplots_3.1.0 ggforce_0.3.2 hms_0.5.3
## [37] digest_0.6.27 stringi_1.5.3 polyclip_1.10-0
## [40] grid_3.5.1 tools_3.5.1 bitops_1.0-6
## [43] magrittr_2.0.1 tibble_3.0.4 crayon_1.3.4
## [46] tidyr_1.0.2 pkgconfig_2.0.3 ellipsis_0.3.1
## [49] MASS_7.3-53 rmarkdown_2.5 R6_2.5.0
## [52] compiler_3.5.1